frames reset
program run_script	
	procedure_level_analysis
	npi_level_analysis
end

program procedure_level_analysis
	frame create table_rvu
	#d ; 
	frame table_rvu { ; set obs 100 ; g depvar = "" ; g indepvar = "" ; g agerange = "" ; g estimate = . ; g se = . ; g min95 = . ; g max95 =. ; g mean_depvar = . ;	g sd_depvar = . ; g mean_indepvar = . ; g sd_indepvar = . ; g obs = . ; g level = "procedure-level" ; g N_UR = . ; } ; 
	#d cr

	loc i = 1
	foreach agerange in age40to55 age56to70 {
	
	use "${intermediate_data}/RVU/npi_hcpcs_pos_panel.dta", clear
	merge m:1 npi year using "${temp}/npi_list_`agerange'.dta" , keep(match) nogen

	gen log_revenue_hcpcs_pos=log(revenue_hcpcs_pos)
	gen log_rvus=log(total_rvu_hcpcs_pos)
	gen log_lines=log(line_srvc_cnt)
	gen log_patients=log(bene_unique_cnt)
	gen log_rvu_value=log(rvu_value_hcpcs_pos)
	
	gegen revenue_doc_yr=total(revenue_hcpcs_pos), by(npi year) 
	egen weights=mean(revenue_doc_yr), by(npi)
	
	* Regressions

	foreach outcome of varl log_rvus log_revenue_hcpcs_pos log_patients log_lines {
	
		reghdfe `outcome' log_rvu_value if inrange(year, 2012, 2017) [aw=weights], absorb(cms_spec_code_num#year age npi hcpcs)  cluster(cms_spec_code_num)

		loc coeff = e(b)[1,1]
		loc se = sqrt(e(V)[1,1])
		loc max95 = e(b)[1,1] - invttail(e(df_r),.975) * sqrt(e(V)[1,1])
		loc min95 = e(b)[1,1] + invttail(e(df_r),.975) * sqrt(e(V)[1,1])
		loc obs = e(N)
		gen sample = (e(sample)==1)
		
		qui sum `outcome' [aw=weights] if sample
			loc meandepv = `r(mean)'
			loc sddepv = `r(sd)'
			
		qui sum log_rvu_value [aw=weights] if sample
			loc meanindepv = `r(mean)'
			loc sdindepv = `r(sd)'
			
		frame table_rvu {
			replace depvar = "`outcome'" if _n == `i' & depvar == ""
			replace indepvar = "log_rvu_value" if _n == `i' & indepvar == ""
			replace agerange = "`agerange'" if _n == `i' & agerange == ""
			replace estimate = `coeff' if _n == `i' & estimate == .
			replace se = `se' if _n == `i' & se == .
			replace min95 = `min95' if _n == `i' & min95 == .
			replace max95 = `max95' if _n == `i' & max95 == .
			replace mean_depvar = `meandepv' if _n == `i' & mean_depvar == .
			replace sd_depvar = `sddepv'  if _n == `i' & sd_depvar == .
			replace mean_indepvar = `meanindepv' if _n == `i' & mean_indepvar == .
			replace sd_indepvar = `sdindepv' if _n == `i' & sd_indepvar == .
			replace obs = `obs' if _n == `i' & obs == .
			replace N_UR = `obs' if _n == `i' & N_UR == .
			loc ++i
		}
		
		drop sample 
	}
	}

end

program  npi_level_analysis
	
	frame table_rvu { 
		replace level = "npi-level" if depvar == ""
		count if !missing(depvar)
		loc i = `r(N)' + 1
	} 

	foreach agerange in age40to55 age56to70 {

		if "`agerange'"=="age40to55" {
			use if inrange(age,40,55) using "${clean_data}/panel_physicians_clean.dta", clear	
		}
		
		else if "`agerange'"=="age56to70" {
			use if inrange(age,56,70) using "${clean_data}/panel_physicians_clean.dta", clear	
		}
	
	merge 1:1 npi year using "${intermediate_data}/RVU/rvus_instrument.dta", keep(match) nogen
	merge 1:1 npi year using "${intermediate_data}/RVU/npi_level_mpupd_panel.dta", keep(match) nogen
	
	gen log_medicare_revenue=log(revenue_doc_yr)
	gen log_rvus=log(total_rvu_doc_yr)
	gen log_lines=log(lines_doc_yr)
	gen log_maxpatients=log(maxpatient_doc_yr)	
	gen log_unique_procedures=log(unique_hcpcs_pos_doc_yr)
	
	gen log_rvus_iv_atfixq=log(rvu_atfixq_doc_yr)

	encode cms_spec_code, gen(cms_spec_code_num)
	
	egen weights=mean(revenue_doc_yr), by(npi) // Weights: average total NPI level of medicare revenue across all years
	
	xtset npi year
	
	* Regressions 

	loc j = 0
	loc indepvar = "log_rvus_iv_atfixq"
	foreach outcome of varl log_medicare_revenue log_rvus log_lines log_maxpatients log_unique_procedures logptotinc retired_1099ssa logptotinc retired_1099ssa {
		
		reghdfe `outcome' log_rvus_iv_atfixq if inrange(year, 2012, 2017) [aw=weights], absorb(cms_spec_code_num#year age npi)  cluster(cms_spec_code)

		
		if `j' == 2 { // IV income and RVUs 
			loc indepvar = "log_rvus"
		 	ivreghdfe logptotinc (log_rvus=log_rvus_iv_atfixq)  if inrange(year, 2012, 2017) [aw=weights], absorb(cms_spec_code_num#year age npi) cluster(cms_spec_code_num)		
		}
		if `j' == 3 { // IV retirement and income 	
			loc indepvar = "logptotinc"
			ivreghdfe retired_1099ssa (logptotinc=log_rvus_iv_atfixq)  if inrange(year, 2012, 2017) [aw=weights], absorb(cms_spec_code_num#year age npi)  cluster(cms_spec_code_num)
		}
		
		loc coeff = e(b)[1,1]
		loc se = sqrt(e(V)[1,1])
		loc max95 = e(b)[1,1] - invttail(e(df_r),.975) * sqrt(e(V)[1,1])
		loc min95 = e(b)[1,1] + invttail(e(df_r),.975) * sqrt(e(V)[1,1])
		loc obs = e(N)
		gen sample = (e(sample)==1)
		
		qui sum `outcome' [aw=weights] if sample
			loc meandepv = `r(mean)'
			loc sddepv = `r(sd)'
			
		qui sum `indepvar' [aw=weights] if sample
			loc meanindepv = `r(mean)'
			loc sdindepv = `r(sd)'
			
		frame table_rvu {
			replace depvar = "`outcome'" if _n == `i' & depvar == ""
			replace indepvar = "`indepvar'" if _n == `i' & indepvar == ""
			replace agerange = "`agerange'" if _n == `i' & agerange == ""
			replace estimate = `coeff' if _n == `i' & estimate == .
			replace se = `se' if _n == `i' & se == .
			replace min95 = `min95' if _n == `i' & min95 == .
			replace max95 = `max95' if _n == `i' & max95 == .
			replace mean_depvar = `meandepv' if _n == `i' & mean_depvar == .
			replace sd_depvar = `sddepv'  if _n == `i' & sd_depvar == .
			replace mean_indepvar = `meanindepv' if _n == `i' & mean_indepvar == .
			replace sd_indepvar = `sdindepv' if _n == `i' & sd_indepvar == .
			replace obs = `obs' if _n == `i' & obs == .
			replace N_UR = `obs' if _n == `i' & N_UR == .
			loc ++i
		}
		
		if inlist("`outcome'","logptotinc","retired_1099ssa") {
			loc ++j
		}
		
		drop sample 
	}

	* Visuals for income IV

	if "`agerange'"=="age40to55" {
				
		gen change_rvus=log_rvus_iv_atfixq-L.log_rvus_iv_atfixq
	
		// Output data for companion table
		#d ;
			frame create fig_rvu ; frame fig_rvu { ; set obs 100 ; g var = "" ; g mean = . ; g sd = . ; g aux = 1 ; g N_UR_stats = . ; } ; 
		#d cr
		
		g aux = 1 
		
		loc k = 1
		foreach outcome of varl log_rvus_iv_atfixq rvu_atfixq_doc_yr change_rvus {
			qui sum `outcome'
			loc mean = `r(mean)'
			loc sd = `r(sd)'
			loc N_UR_stats = `r(N)'
			frame fig_rvu {
				replace var = "_`outcome'" if _n == `k' & var == ""
				replace mean = `mean' if _n == `k' & mean == .
				replace sd = `sd' if _n == `k' & sd == .
				replace N_UR_stats = `N_UR_stats' if _n == `k' & N_UR_stats == .
			}
			loc ++k
		}
		
		frame fig_rvu {
			drop if var == ""
			drbest_docinc mean sd 
			reshape wide mean sd N_UR_stats, i(aux) j(var) string
		}
		
		* Add data for kdensity 
		egen perc = xtile(change_rvus), nq(100)
		foreach perc in 5 25 50 75 95 {
			qui su change_rvus if perc == `perc'  
			assert `r(N)' > 10
			
			g p`perc' =  `r(mean)'
			g N_UR_p`perc' = `r(N)'
		} 
		drop perc
		
		qui su change_rvus, d
			loc p1 = `r(p1)'
			loc p99 = `r(p99)'		
			g N_UR_kdensity = `r(N)'
			
		kdensity change_rvus, nograph k(gaussian) n(200) generate(x_change_rvus y_change_rvus)
		replace x_change_rvus = . if !inrange(x_change_rvus, `p1', `p99')		
		replace y_change_rvus = . if !inrange(x_change_rvus, `p1', `p99')		
		keep if !missing(x_change_rvus)
		
		keep x_change_rvus y_change_rvus aux p5 p25 p50 p75 p95 N_UR_*
		
		frlink m:1 aux, frame(fig_rvu) gen(link)
		frget mean* sd* N_UR_stats*, from(link)
		
		* Save data for histogram
		ren (x_change_rvus y_change_rvus) (rvu_chg frac)
		drbest_docinc rvu_chg frac p5 p25 p50 p75 p95
		drop aux link
		order N_UR*, last
		
		g N_UR_stats = . 
		loc k = 1
		foreach outcome in log_rvus_iv_atfixq rvu_atfixq_doc_yr change_rvus {
			replace mean_`outcome' = . if _n != `k'
			replace sd_`outcome' = . if _n != `k'
			replace N_UR_stats = N_UR_stats_`outcome' if _n == `k'
			drop N_UR_stats_`outcome'
			loc ++k
		}
		order N_UR*, last
		export delimited using "${mypath}/intermediate_csv/govtpolicy01-kden_changes_instrument.csv", replace dataf delim(tab) 
	}
	}
	
	* Save data for main regression table and coefplot

	frame change table_rvu
	drop if depvar == ""
	keep if inlist(depvar,"logptotinc","log_rvus","log_unique_procedures","log_rvus","log_patients","retired_1099ssa") 
	drop if depvar=="retired_1099ssa" & indepvar == "log_rvus_iv_atfixq"

	drbest_docinc estimate se min95 max95 mean_depvar sd_depvar mean_indepvar sd_indepvar 
	drbcount obs, replace
	order N_UR, last
	export delimited using "${mypath}/intermediate_csv/govtpolicy01-RVU_Regressions.csv", replace dataf delim(tab) 

end

run_script
